*Dataset with singles*

*Load data*
clear

use "Results\fig_A1\dataset_with_singles_2000_2018.dta"

sort final_educ

merge m:1 final_educ using "Results\fig_A1\final_educ_hours_moments_corrected.dta"
drop if _merge==2
drop _merge


***


**kmeans**
* avr_hours_50 short_hours_50 sft_50 avr_hours_100 short_hours_100 sft_100

*standardize variables
sum avr_hours_50
sca the_mean_h50=r(mean)
sca the_sd_h50=r(sd)
gen avr_hours_50_s=(avr_hours_50-the_mean_h50)/the_sd_h50
sum avr_hours_50_s

sum avr_hours_100
sca the_mean_h100=r(mean)
sca the_sd_h100=r(sd)
gen avr_hours_100_s=(avr_hours_100-the_mean_h100)/the_sd_h100
sum avr_hours_100_s

sum sft_50
sca the_mean_s50=r(mean)
sca the_sd_s50=r(sd)
gen sft_50_s=(sft_50-the_mean_s50)/the_sd_s50
sum sft_50_s

sum sft_100
sca the_mean_s100=r(mean)
sca the_sd_s100=r(sd)
gen sft_100_s=(sft_100-the_mean_s100)/the_sd_s100
sum sft_100_s

sum irreg_50
sca the_mean_i50=r(mean)
sca the_sd_i50=r(sd)
gen irreg_50_s=(irreg_50-the_mean_i50)/the_sd_i50
sum irreg_50_s

sum irreg_100
sca the_mean_i100=r(mean)
sca the_sd_i100=r(sd)
gen irreg_100_s=(irreg_100-the_mean_i100)/the_sd_i100
sum irreg_100_s

sum short_hours_50
sca the_mean_sh50=r(mean)
sca the_sd_sh50=r(sd)
gen short_hours_50_s=(short_hours_50-the_mean_sh50)/the_sd_sh50
sum short_hours_50_s

sum short_hours_100
sca the_mean_sh100=r(mean)
sca the_sd_sh100=r(sd)
gen short_hours_100_s=(short_hours_100-the_mean_sh100)/the_sd_sh100
sum short_hours_100_s


preserve
sort final_educ pnr aar 
by final_educ: gen evalp = _n==1
keep if evalp==1
keep final_educ avr_hours_50 short_hours_50 sft_50 irreg_50 avr_hours_100 short_hours_100 sft_100 irreg_100 avr_hours_50_s short_hours_50_s sft_50_s irreg_50_s avr_hours_100_s short_hours_100_s sft_100_s irreg_100_s
drop if final_educ == .

save "Results\fig_A1\final_educ_hours_moments_standardized.dta", replace
restore


cluster kmeans short_hours_50_s irreg_50_s, k(4) name(ambition_short_irreg_50) s(kr(1234))
tab ambition_short_irreg_50
tabstat short_hours_50_s irreg_50_s, by(ambition_short_irreg_50)


sort pnr aar

**Keep only those for who we observe ambition type**
keep if wage_growth_ambition!=.

*Keep only couples where we observe both partners
gen temp2=koen if relationship==1
destring temp2, replace
by couple_id aar, sort: egen temp3=mean(temp2)
keep if (temp3==1.5 & relationship==1) | relationship==0
drop temp2 temp3

sort pnr aar

*Only keep those with non missing robustness check ambition types*
foreach var in short_irreg_50 {
preserve

keep if ambition_`var'!=.

*Keep only couples where we observe both partners
gen temp2=koen if relationship==1
destring temp2, replace
by couple_id aar, sort: egen temp3=mean(temp2)
keep if (temp3==1.5 & relationship==1) | relationship==0
drop temp2 temp3

sort pnr aar



*Save*
save "Results\fig_A1\dataset_with_singles_LFS_`var'_primeage.dta", replace
restore
}

